%include "Q:\PU2\Macros\hashmerge.sas";

libname input "Q:\External Research Data\AutoPay";
libname output "Q:\PU2\New folder";
options threads cpucount=actual;


*This program loops over all months from 2011 to 2020, summarizing the total number of employment in each
yr_month-state-client_code-salaried-bin;
*Each loop takes 3min;
*I use this dataset to then construct the panel of distributions;

*Clean a month of data;
%macro clean(y=,m=);
	data workers_base&y.&m.(keep=yr_month state_bin client_code);
		set input.ap_20&y&m(where=(emp_stat_c~="T") 
			rename=(addr_st_c=state) keep=addr_st_c emp_stat_c yr_month client_code);

		*Generate state_bin;
			if state~="AK" and state~="CA" and state~="NY" then state_bin=1;
		    if state="AK" then state_bin=2;
		    if state="CA" then state_bin=3;
		    if state="NY" then state_bin=4;

			length state_bin 3;
			length yr_month 4;
	run;
%mend;

*Collapse data;
%macro collapse_firm(data=,y=,m=);
	*Employed in each bin, gross earnings;
	proc summary data=&data.&y.&m. nway ; 
		class yr_month state_bin client_code;
		output out=collapsed_adp&y.&m. (drop=_:);					
	run;  
%mend;	

*Loop;
%macro loop_clean(y=,m=,start=,end=);
%do i=&start. %to &end.;
%let _timer_start=%sysfunc(datetime());
	%let a=%eval(&m+&i-1);
	%let b=%eval(&y+%sysevalf(&a/12,floor));
	%let c=%sysfunc(putn(%sysevalf(1+&a-12*%sysevalf(&a/12,floor)),z2.));
	%clean(y=&b,m=&c);

	*Frequency distribution of base_pay: Plot difference between two dates;
		%collapse_firm(data=workers_base,y=&b.,m=&c.);
		proc append base=output.firm_list data=collapsed_adp&b.&c. force; run;

	*Delete the temporary files;
		proc datasets nolist;
			delete collapsed_adp&b.&c. workers_base&b.&c.;
		run;

data _null_;
	dur=datetime()-&_timer_start;
	put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
%end;
%mend;

data output.firm_list;
	length client_code $ 14;
	input yr_month state_bin;
	format yr_month mmddyy10.;
	length state_bin 3;
	length yr_month 4;
	datalines;
run;
%loop_clean(y=11,m=01,start=0,end=108);